---
author: "Merging and generating final analysis dataset"
---

# load left-hand-side data

```{r}
# load packages
  source("helper-packages.R")

# increase memory limit
  memory.limit(size = 50000)

# load and stack dataframes
  stacked <- 

# 1 pew global attitudes
    bind_rows(readRDS("../cleaned-data/y-1-multi-pew-global-attitudes.rds")) %>% 

# 2 fes youth studies
    bind_rows(readRDS("../cleaned-data/y-2-multi-fes-youth-studies-see.rds")) %>%
    
# 3 pulse of europe
    bind_rows(readRDS("../cleaned-data/y-3-multi-pulse-of-europe.rds")) %>%
  
# 4 caucasus barometer
    bind_rows(readRDS("../cleaned-data/y-4-multi-caucasus-barometer.rds")) %>% 

# 5 world values survey  
    bind_rows(readRDS("../cleaned-data/y-5-multi-world-values-survey.rds")) %>%

# 6 international social survey program 
    bind_rows(readRDS("../cleaned-data/y-6-multi-issp.rds")) %>%
  
# 7	cross-national survey of muslim attitudes 
    bind_rows(readRDS("../cleaned-data/y-7-multi-cross-national-muslim-attitudes.rds")) %>%

# 8	perception and acceptance of religious diversity among the european population
    bind_rows(readRDS("../cleaned-data/y-8-multi-perception-and-acceptance-european-population.rds")) %>%
  
# 9	middle eastern values survey - ccnsf
    bind_rows(readRDS("../cleaned-data/y-9-multi-middle-east-values-study-ccnsf.rds")) %>%  
    
# 10 middle eastern values survey - cpsdc
    bind_rows(readRDS("../cleaned-data/y-10-multi-middle-east-values-study-cpsdc.rds")) %>%
    
# 11 middle eastern values survey - rfapv
    bind_rows(readRDS("../cleaned-data/y-11-multi-middle-east-values-study-rfapv.rds")) %>%
    
# 12 life in transition survey
    bind_rows(readRDS("../cleaned-data/y-12-multi-life-in-transition-survey.rds")) %>% 

# 13 pew: islam and christianity in sub-saharan africa
    bind_rows(readRDS("../cleaned-data/y-13-multi-pew-islam-christianity-sub-saharan-africa.rds")) %>% 
  
# 14 religious belief and national belonging in central and eastern europe
    bind_rows(readRDS("../cleaned-data/y-14-multi-pew-religious-belief-and-national-belonging-in-central-and-eastern-europe.rds")) %>% 

# 15 latinobarometer
    bind_rows(readRDS("../cleaned-data/y-15-multi-latinobarometro.rds")) %>% 
    
# 16 european values study
    bind_rows(readRDS("../cleaned-data/y-16-multi-european-values-study.rds")) %>% 
  
# 17 pew: the world’s muslims: religion, politics and society
    bind_rows(readRDS("../cleaned-data/y-17-multi-pew-worlds-muslims.rds")) %>%
  
# 18 afrobarometer
    bind_rows(readRDS("../cleaned-data/y-18-multi-afrobarometer.rds")) %>% 
    
# 19 second european union minorities and discrimination survey (eu-midis ii), 2016
    bind_rows(readRDS("../cleaned-data/y-19-multi-second-european-union-minorities-and-discrimination-survey.rds")) %>% 

# 20 fes youth studies (eastern europe and central asia)
    bind_rows(readRDS("../cleaned-data/y-20-fes-youth-studies-east-europe-central-asia.rds")) %>%  

# 21 eurobarometer (candidate countries eurobarometer)
    bind_rows(readRDS("../cleaned-data/y-21-multi-candidate-eurobarometer.rds")) %>% 
    
# 22 arab barometer  
    bind_rows(readRDS("../cleaned-data/y-22-multi-arab-barometer.rds")) %>%
  
# 23 eurobarometer (standard and special eurobarometer)  
    bind_rows(readRDS("../cleaned-data/y-23-multi-eurobarometer.rds")) %>% 

# 24 arab opinion index  
    bind_rows(readRDS("../cleaned-data/y-24-multi-arab-opinion-index.rds")) %>% 

# 25 cronos  
    bind_rows(readRDS("../cleaned-data/y-25-multi-cronos.rds"))
```  

# load right-hand-side data
  
```{r}  
# 1 nelda
  nelda_raw <- 
    readRDS("../cleaned-data/x-1-nelda.rds")

# 2 countries by region
  regions_raw <- 
    readRDS("../cleaned-data/x-2-country-regions.rds")  

# 3 vparty  
  vparty_raw <- 
    readRDS("../cleaned-data/x-3-vparty.rds")
  
# 4 largest religion
  cnse_raw <- 
    readRDS("../cleaned-data/x-4-cnse-largest-relig.rds")  
  
# 5 religion and state discrimination indexes
  ras_raw <- 
    readRDS("../cleaned-data/x-5-relig-and-state.rds")
  
# 6 vdem regime data
  vdem_raw <- 
    readRDS("../cleaned-data/x-6-vdem.rds")
  
# 7 un gdp per capita
  ungdp_raw <- 
    readRDS("../cleaned-data/x-7-un-gdppc.rds")

# 8 religious fractionalization/polarization  
  rf_raw <- 
    readRDS("../cleaned-data/x-8-rel-fractionalization-polarization.rds")
  
# 9 religious conflict 
  relac_raw <- 
    readRDS("../cleaned-data/x-9-relac.rds")
  
# 10 dalp
  dalp_raw <- 
    readRDS("../cleaned-data/x-10-dalp.rds")
  
# 11 dpi pr/plurality
  dpi_pr_plur_raw <- 
    readRDS("../cleaned-data/x-11-smp-pr.rds")  

# 12 dpi federalism
  dpi_federalism_raw <- 
    readRDS("../cleaned-data/x-12-dpi-federalism.rds") 
  
# 13 elf indexes
  elf_raw <- 
    readRDS("../cleaned-data/x-13-ethlang-fractionalization.rds")
```

# reshape, from wide to long

```{r}
# declare variables
  id_vars <- 
    c("resp_source",
      "resp_round",
      "resp_original_data_url",
      "resp_questionnaire_db_link",
      "resp_survey_mode",
      "resp_country_original",
      "resp_country_common",
      "resp_interview_date",
      "resp_interview_start_date",
      "resp_interview_end_date",
      "resp_religion",
      "resp_denomination",
      "resp_age",
      "resp_education_original",
      "resp_female",
      "resp_rural")
 
# reshape, such that each individual respondent has a unique code but each row represents one question
  long_stacked <-
    stacked %>%
    mutate(
      
      # for consistency, add extra info to the religiosity and gentrust info vars    
        resp_gentrust_qinfo = paste0(resp_gentrust_qinfo, "; TARGET: NA-GENTRUST; TYPE: General trust"),
        resp_religiosity_qinfo = paste0(resp_religiosity_qinfo, "; TARGET: NA-RELIGIOSITY; TYPE: Religiosity"),
        resp_religiosity_original = as.character(resp_religiosity_original)) %>%
    
    rename(
      
      # rename gentrust vars for reshape convenience
        resp_soc_dist_80_qinfo = resp_gentrust_qinfo,  
        resp_soc_dist_80_original = resp_gentrust_original,
        resp_soc_dist_80_bin_recode = resp_gentrust_bin_recode,
      
      # rename religiosity vars for reshape convenience
        resp_soc_dist_81_qinfo = resp_religiosity_qinfo, 
        resp_soc_dist_81_original = resp_religiosity_original,
        resp_soc_dist_81_bin_recode = resp_religiosity_recode) %>% 
    mutate(
      
      # generate unique id for individual respondents
      resp_id = 1:nrow(.)) %>%

  # perform the reshape        
    rename_with(~str_replace(., "_(\\d+)(_.*)", "\\2_\\1"), 
      -c(
        resp_id, 
        resp_source,
        resp_round,
        resp_original_data_url,
        resp_survey_mode,
        resp_country_original,
        resp_country_common,
        resp_interview_date,
        resp_interview_start_date,
        resp_interview_end_date,
        resp_religion,
        resp_denomination,
        resp_age,
        resp_education_original,
        resp_female,
        resp_rural)) %>%
    pivot_longer(cols = 
     -c(resp_id, 
        resp_source,
        resp_round,
        resp_original_data_url,
        resp_survey_mode,
        resp_country_original,
        resp_country_common,
        resp_interview_date,
        resp_interview_start_date,
        resp_interview_end_date,
        resp_religion,
        resp_denomination,
        resp_age,
        resp_education_original,
        resp_female,
        resp_rural), 
      names_to = c( ".value", "resp_soc_dist_qnum"), 
      names_pattern = "(.*)_(\\d+$)") %>% 
    
  # compute proportion of NA responses for each source/round/country/question
  group_by(resp_source, resp_round, resp_country_common, resp_soc_dist_qnum) %>% 
    mutate(proportion_of_question_na = mean(is.na(resp_soc_dist_bin_recode))) %>%
  ungroup() %>% 
    
  # keep only responses for which source/round/country/question NA proportion is less than 1 (because the question wasn't asked)  
  filter(proportion_of_question_na < 1)
  
# separate out and clean the question-information  
  long_stacked_tidied <- 
    long_stacked %>% 
    separate(
      resp_soc_dist_qinfo, 
      sep = ";", 
      into = c(
        "qinfo_number", 
        "qinfo_text", 
        "qinfo_roptions", 
        "qinfo_target", 
        "qinfo_type")) %>% 
    mutate(
      qinfo_text = str_trim(str_remove(qinfo_text, "QTEXT:")),
      qinfo_number = str_trim(str_remove(qinfo_number, "NUM: ")),
      qinfo_roptions = str_trim(str_remove(qinfo_roptions, "ROPTIONS: ")),
      qinfo_roptions = gsub(" \\+", "; ", qinfo_roptions),
      qinfo_target = str_trim(str_remove(qinfo_target, "TARGET: ")),
      qinfo_type = str_trim(str_remove(qinfo_type, "TYPE: ")),
      resp_round = ifelse(is.na(resp_round), "", resp_round),
      qinfo_type_group = 
        case_when(
          str_detect(qinfo_type, "Distance") ~ "Distance",
          TRUE ~ qinfo_type),
      resp_highest_edu_group = str_extract(resp_education_original, "(?<=\\[).+?(?=\\])")) %>% 
  
  # small territories correction
    mutate(
      resp_country_common = 
        case_when(
          resp_country_common == "Puerto Rico" ~ "US",
          resp_country_common == "Macao" ~ "China",
          resp_country_common == "Hong Kong" ~ "China",
          TRUE ~ resp_country_common))
``` 

# generate indicators for largst-religion member and different-religion matches

```{r}
# merge and clean  
  long_stacked_w_largestrel <-
    long_stacked_tidied %>%
  
  # merge in data on the largest religion in each country
    left_join(cnse_raw, by = c("resp_country_common" = "cnse_common_country_name")) %>% 
  
  # binary variable for member of largest religion
    mutate(resp_member_largest_religion = (resp_religion == cnse_largest_religion)*1,
           resp_member_largest_religion = 
             case_when(
               cnse_largest_religion %in% c("Animist/indigenous/traditional", "Shinto", "Other religion") & resp_religion == "Other religion" ~ NA_real_, # I cannot be sure in these cases; note, note, Shintoism is reported by very few respondents in Japan
               TRUE ~ resp_member_largest_religion)) %>%
  
  # categorical variable for global regions
    left_join(regions_raw, by = c("resp_country_common" = "regions_country_common"))
```

# for each row, define whether individual is a religious outgroup relative to the target religion in the question

```{r}
# define the (mis)matches 
  long_stacked_w_largestrel_w_matches <- 
    long_stacked_w_largestrel %>% 
    mutate(
      
    # binary variable for NA response tp the outcome question (applies to gentrust and religiosity too) 
      resp_response_na = 
        case_when(
          is.na(resp_soc_dist_bin_recode) ~ 1,
          TRUE ~ 0),
        
    # binary variable for whether respondent is a religious outgroup vis the target in the question  
      resp_is_outgroup =
        case_when(
          # different religion: holds for everyone who declares any religion
            qinfo_target == "Different religion" ~ 1,
      
          # different christian sect; applies only to christians
            qinfo_target == "Different sect, Christian" & resp_religion == "Christian" ~ 1,
          
          # different muslim sect; applies only to muslims
            qinfo_target == "Different sect, Muslim" & resp_religion == "Muslim" ~ 1,
      
          # different religious sect; applies to everyone who declares any religion
            qinfo_target == "Different sect, general" ~ 1,
          
          # shia: applies to everyone who isn't shia (could be non-muslim respondent too, therefore; BUT must exclude muslims who do not declare a sect)
            qinfo_target == "Shia" & 
            resp_denomination != "Shia" & 
            (!resp_denomination %in% c(
              "Islam; nfd", 
              "Muslim-not specified (option volunteered in Tunisia and Turkey)", 
              "Muslim (sect not otherwise specified)")) ~ 1,
          
          # sunni: applies to everyone who isn't shia (could be non-muslim respondent too, therefore; BUT must exclude muslims who do not declare a sect)
            qinfo_target == "Sunni" & 
            resp_denomination != "Sunni" & 
            (!resp_denomination %in% c(
              "Islam; nfd", 
              "Muslim-not specified (option volunteered in Tunisia and Turkey)", 
              "Muslim (sect not otherwise specified)", 
              "Volunteered: Hanafi", 
              "Volunteered: Shafi")) ~ 1,
          
          # alawi: applies to everyone who isn't alawi (could be non-muslim respondent too, therefore; BUT must exclude muslims who do not declare a sect)
            qinfo_target == "Alawi" & 
            resp_denomination != "Alawi" & 
            (!resp_denomination %in% c(
              "Islam; nfd", 
              "Muslim-not specified (option volunteered in Tunisia and Turkey)", 
              "Muslim (sect not otherwise specified)")) ~ 1,
          
          # orthodox: applies to everyone who isn't orthodox (could be non-christian respondent too, therefore; BUT must exclude christians who do not decalare a sect)
            qinfo_target == "Orthodox" & 
            resp_denomination != "Orthodox" & 
            (!resp_denomination %in% c(
              "Don't know (DO NOT READ)", 
              "No particular denomination", 
              "Just a Christian", 
              "Something else/other", 
              "Refused (DO NOT READ)")) ~ 1,
          
          # protestant: applies to everyone who isn't protestant (could be non-christian respondent too, therefore; BUT must exclude christians who do not decalare a sect))
            qinfo_target == "Protestant" & 
            resp_denomination != "Protestant" & 
            (!resp_denomination %in% c(
              "Seventh-day Adventist Church", 
              "Israelita Nuevo Pacto Universal (Peru)")) ~ 1,

          # jehovah's witness: applies to everyone who isn't jehovah's witness (could be non-christian respondent too, therefore; BUT must exclude christians who do not decalare a sect))
            qinfo_target == "Jehovahs Witness" & 
            resp_denomination != "Jehovahs Witness" & 
            (!resp_denomination %in% c(
              "Other Christian Church or group")) ~ 1,

          # molokan: applies to everyone who isn't molokan (could be non-christian respondent too, therefore; BUT must exclude christians who do not decalare a sect))
            qinfo_target == "Molokan" & 
            resp_denomination != "Molokan" & 
            (!resp_denomination %in% c(
              "Other Christian Church or group")) ~ 1,
          
          # catholic: applies to everyone who isn't catholic (could be non-christian respondent too, therefore; BUT must exclude christians who do not decalare a sect))
            qinfo_target == "Roman Catholic" & 
            resp_denomination != "Roman Catholic" & 
            (!resp_denomination %in% c(
              "Don't know (DO NOT READ)", 
              "No particular denomination", 
              "Just a Christian", 
              "Something else/other", 
              "Refused (DO NOT READ)")) ~ 1,
          
          # jewish: every religious person who isn't jewish
            qinfo_target == "Jewish" & resp_religion != "Jewish" ~ 1,
          
          # christian: every religious person who isn't christian
            qinfo_target == "Christian" & resp_religion != "Christian" ~ 1,
          
          # muslim: every religious person who isn't muslim
            qinfo_target == "Muslim" & resp_religion != "Muslim" ~ 1,
          
          # hindu: every religious person who isn't hindu
            qinfo_target == "Hindu" & resp_religion != "Hindu" ~ 1,
          
          # buddhist: every religious person who isn't buddhist
            qinfo_target == "Buddhist" & resp_religion != "Buddhist" ~ 1,
          
          # all else defaults to zero for this var
            TRUE ~ 0),
      
    # anyone who doesn't declare a religion (or is atheist or agnostic) can't have a religious outgroup
      resp_is_outgroup = 
        case_when(
          is.na(resp_religion) ~ 0,
          TRUE ~ resp_is_outgroup),
      
    # generate binary variable for declaring any religion (zero otherwise)
      resp_has_religion = (!is.na(resp_religion))*1) %>% 
    
  # compute proportion of resp_religion responses that are NA by country/survey/round/question
  ############## CHECK THIS AGAIN
    group_by(resp_country_common, resp_source, resp_round, qinfo_number, qinfo_type_group) %>%
      mutate(mean_resp_has_religion = mean(resp_has_religion)) %>% 
    ungroup() %>%

  # remove observations where country/survey/round/question is wholly NA for resp_religion
    filter(mean_resp_has_religion != 0)
```

# generate the "main date" variable and the date-sensitivity variable

```{r}
# clean
  long_stacked_w_largestrel_w_matches_w_dates <- 
    long_stacked_w_largestrel_w_matches %>% 
    mutate(
      
    # generate main date variable
      resp_main_date = 
        case_when(
          is.na(resp_interview_date) ~ 
            resp_interview_start_date + floor((resp_interview_end_date - resp_interview_start_date)/2),
          TRUE ~ resp_interview_date),
    
    # generate var for number of days between start and end date of survey period (where employed for resp_main_date)    
      resp_interview_start_end_date_range = 
        case_when(
          is.na(resp_interview_date) ~ as.numeric(resp_interview_end_date - resp_interview_start_date),
          TRUE ~ 0))
```

# merge nelda data  

```{r}
# make 2 versions of nelda data for merging: for first election pre and first election post response
  nelda_last <- 
    nelda_raw %>% 
    rename_all(paste0, "_LAST")

  nelda_next <- 
    nelda_raw %>% 
    rename_all(paste0, "_NEXT")
  
# function to deal with infinity values
  inf_na <- function(x) {x[is.infinite(x)] = NA; return(x)}

# function to find the last/next election in that country
  elec_dist <- 
    function(country = NULL, df = long_stacked_w_largestrel_w_matches_w_dates) {
      
      # keep only that country
      df <- 
        df %>% 
        filter(resp_country_common == country)
      
      # extract place information      
      election_dates_country <- 
        nelda_raw %>% 
        filter(nelda_country_name == country) %>% 
        pull(nelda_election_date)
      
      # add next upcoming election
      df <- 
        df %>% 
        group_by(resp_main_date) %>% 
        mutate(!!paste0("next_election") := min(election_dates_country[election_dates_country > resp_main_date[1]]) %>% inf_na) %>%
        ungroup()
      
      # add most recent (last) election
      df <- 
        df %>% 
        group_by(resp_main_date) %>% 
        mutate(!!paste0("last_election") := max(election_dates_country[election_dates_country < resp_main_date[1]]) %>% inf_na) %>%
        ungroup()      
      
    } 

# run models  
  long_stacked_w_largestrel_w_matches_w_dates_elecs <- 
    suppressWarnings(suppressMessages(pmap_dfr(list(unique(long_stacked_w_largestrel_w_matches_w_dates$resp_country_common)), .f = elec_dist)))
```  
  
# generate treatment variables  
  
```{r}  
# clean
  long_stacked_w_largestrel_w_matches_w_dates_elecs_tranforms <- 
    long_stacked_w_largestrel_w_matches_w_dates_elecs %>% 
    mutate(
      
  # generate "raw inputs"
      
    # num of days till next election
      x_num_days_to_next_election = 
        next_election - resp_main_date,
      
    # num of days since last election
      x_num_days_since_last_election = 
        resp_main_date - last_election,
    
    # cutoff for the end of the nelda series: 3 months
      cutoff_next_election_3m = as.Date("2020-12-31") - 91, 

    # cutoff for the end of the nelda series: 3 months
      cutoff_next_election_6m = as.Date("2020-12-31") - 182, 

    # cutoff for the end of the nelda series: 3 months
      cutoff_next_election_9m = as.Date("2020-12-31") - 274, 

  # pre-post variables  
  
    # was survey within 3 months of an election on either side?
      x_within_3_months_pre_post_election = 
        ifelse(x_num_days_since_last_election < 91 | x_num_days_to_next_election < 91, 1, 0),
        
    # was survey within 6 months of an election on either side?
      x_within_6_months_pre_post_election = 
        ifelse(x_num_days_since_last_election < 182 | x_num_days_to_next_election < 182, 1, 0),

    # was survey within 9 months of an election on either side?
      x_within_9_months_pre_post_election = 
        ifelse(x_num_days_since_last_election < 274 | x_num_days_to_next_election < 274, 1, 0),
  
    # endpoints correction: 3 months pre/post
      x_within_3_months_pre_post_election = 
        case_when(
          is.na(next_election) &
          !is.na(last_election) & # ensures countries with no elections aren't captured  
          is.na(x_within_3_months_pre_post_election) &  
          resp_main_date < cutoff_next_election_3m ~ 0,
          TRUE ~ x_within_3_months_pre_post_election),  
  
    # endpoints correction: 6 months pre/post
      x_within_6_months_pre_post_election = 
        case_when(
          is.na(next_election) &
          !is.na(last_election) & # ensures countries with no elections aren't captured  
          is.na(x_within_6_months_pre_post_election) &  
          resp_main_date < cutoff_next_election_6m ~ 0,
          TRUE ~ x_within_6_months_pre_post_election), 
  
    # endpoints correction: 9 months pre/post
      x_within_9_months_pre_post_election = 
        case_when(
          is.na(next_election) &
          !is.na(last_election) & # ensures countries with no elections aren't captured  
          is.na(x_within_9_months_pre_post_election) &  
          resp_main_date < cutoff_next_election_9m ~ 0,
          TRUE ~ x_within_9_months_pre_post_election),   
  
  # pre variables
  
    # within 3 months before an election
      x_within_3_months_pre_election =
        ifelse(x_num_days_to_next_election < 91, 1, 0),

    # within 6 months before an election
      x_within_6_months_pre_election =
        ifelse(x_num_days_to_next_election < 182, 1, 0),
      
    # within 9 months before an election
      x_within_9_months_pre_election =
        ifelse(x_num_days_to_next_election < 274, 1, 0),
  
    # endpoints correction: 3 months pre
      x_within_3_months_pre_election = 
        case_when(
          is.na(next_election) &
          !is.na(last_election) & 
          is.na(x_within_3_months_pre_election) &  
          resp_main_date < cutoff_next_election_3m ~ 0,
          TRUE ~ x_within_3_months_pre_election), 
  
    # endpoints correction: 6 months pre
      x_within_6_months_pre_election = 
        case_when(
          is.na(next_election) &
          !is.na(last_election) & 
          is.na(x_within_6_months_pre_election) &  
          resp_main_date < cutoff_next_election_6m ~ 0,
          TRUE ~ x_within_6_months_pre_election), 
  
    # endpoints correction: 9 months pre
      x_within_9_months_pre_election = 
        case_when(
          is.na(next_election) &
          !is.na(last_election) & 
          is.na(x_within_9_months_pre_election) &  
          resp_main_date < cutoff_next_election_9m ~ 0,
          TRUE ~ x_within_9_months_pre_election),   

  # post variables
  
    # within 6 months after an election
      x_within_3_months_post_election =
        ifelse(x_num_days_since_last_election < 91, 1, 0),
    
    # within 6 months after an election
      x_within_6_months_post_election =
        ifelse(x_num_days_since_last_election < 182, 1, 0),
      
    # within 1 month after an election
      x_within_9_months_post_election =
        ifelse(x_num_days_since_last_election < 274, 1, 0),
          
  # variable for percent-way through election  
      x_percent_through_election = 
        (as.numeric(x_num_days_since_last_election)/as.numeric((next_election - last_election))))


### RUN FURTHER CHECKS
# ble <- long_stacked_w_largestrel_w_matches_w_dates_elecs_tranforms %>%
#   sample_n(100) %>% 
#   select(resp_country_common, resp_main_date, last_election, next_election, cutoff_next_election_6m, x_within_6_months_pre_post_election)
```

# merge in additional data and transform demographic variables

```{r}
# merge and clean
  long_stacked_w_largestrel_w_matches_w_dates_elecs_tranforms_neldacovs_extras <- 
    long_stacked_w_largestrel_w_matches_w_dates_elecs_tranforms %>%
  
  # generate year variable based on last election
    mutate(last_election_year = year(last_election)) %>%
  
  # merge in gdp per capita data
    left_join(ungdp_raw, by = c("resp_country_common" = "un_country_common", "last_election_year" = "year")) %>%
  
  # merge in regimes data
    left_join(vdem_raw, by = c("resp_country_common" = "vdem_country_common", "last_election_year" = "year")) %>%

  # generate unique id for country/election cycle
    mutate(country_election_cycle_id = group_indices(., last_election, resp_country_common)) %>%

  # general education dummy variables
    mutate(
      resp_edu_1_noedu =
        case_when(
          resp_highest_edu_group %in% c("No education") ~ 1,
          resp_highest_edu_group %in% c("Primary", "College") ~ 0,
          TRUE ~ NA_real_),
      resp_edu_2_primary_to_college =
        case_when(
          resp_highest_edu_group %in% c("Primary") ~ 1,
          resp_highest_edu_group %in% c("No education", "College") ~ 0,
          TRUE ~ NA_real_),
      resp_edu_3_college =
        case_when(
          resp_highest_edu_group %in% c("College") ~ 1,
          resp_highest_edu_group %in% c("No education", "Primary") ~ 0,
          TRUE ~ NA_real_)) %>%

  # recode age; for ages given as range, take middle of the range
    separate(resp_age, into = c("temp_a", "temp_b"), sep = "-|\\+|\\-| or over") %>%
    mutate(across(c(temp_a, temp_b), as.numeric)) %>%
    mutate(resp_age_recode = rowMeans(select(., temp_a, temp_b), na.rm = T)) %>%
    select(-c(temp_a, temp_b)) %>%
  
    mutate(

  # generate var for being surveyed face to face (zero otherwise)
    resp_surveyed_in_person =
      case_when(
        resp_survey_mode %in% c("in-person", "in-personsu") ~ 1,
        resp_survey_mode %in% c("internet", "internet/mail", "mail", "phone") ~ 0,
        TRUE ~ NA_real_)) %>%

  # merge nelda covariates
    left_join(
      nelda_last, 
      by = c("resp_country_common" = "nelda_country_name_LAST", "last_election" = "nelda_election_date_LAST")) %>% 
    left_join(
      nelda_next, 
      by = c("resp_country_common" = "nelda_country_name_NEXT", "next_election" = "nelda_election_date_NEXT")) %>%   
  
  # merge vparty covariates
    left_join(vparty_raw, by = c("resp_country_common" = "vparty_country_common", "last_election_year" = "vparty_year")) %>%

  # merge religion and state covariates     
    left_join(ras_raw, by = c("resp_country_common" = "ras_country_common", "last_election_year" = "ras_year")) %>% 

  # merge religious fractionalization covariates
    left_join(rf_raw, by = c("resp_country_common" = "relfp_county_common")) %>%

  # merge ethnic and linguistic fractionalization covariates
    left_join(elf_raw, by = c("resp_country_common" = "adekw_country_common")) %>%
  
  # merge proportional representation and plurality for house elections covariates
    left_join(dpi_pr_plur_raw, by = c("resp_country_common" = "dpi_country_common", "last_election_year" = "dpi_year")) %>%
  
  # merge federalism covariate
    left_join(dpi_federalism_raw, by = c("resp_country_common" = "dpi_country_common", "last_election_year" = "dpi_year")) %>%  

  # merge dalp party connections to religion covariates
    left_join(dalp_raw, by = c("resp_country_common" = "dalp_country_common")) %>%

  # generate on-cycle election indicator from nelda vars
    mutate(nelda_last_and_next_election_oncycle = 
        (nelda_6_on_schedule_election_LAST == 1 & nelda_6_on_schedule_election_NEXT == 1)*1) %>% 

  # merge religious conflict history indicator
    left_join(relac_raw, by = c("resp_country_common", "last_election")) %>% 
  
  # filter people younger than 15 (but keep if resp_age is missing)
    filter(is.na(resp_age_recode) | resp_age_recode >= 15)  
```

# save analysis dataframe

```{r}
  saveRDS(long_stacked_w_largestrel_w_matches_w_dates_elecs_tranforms_neldacovs_extras, "../cleaned-data/analysis_df.rds")
```
